************************************************************************************************
* Purpose: Dataset with visits, phone contacts, primary nurse etc. at individual-level in munics that use NOVAX consistently
************************************************************************************************
do "D:\Data\workdata\708296\Project - Mother groups and mental health\Do\global.do"  

*****************************************************************************************************************
* coverage of novax
*
* we start by only choosing municipalities and years with good coverage (=at least 90 pct of kids of the cohort at at least observed once in NOVAX - a signal that the munic is using NOVAX and that the data does not come eg from imported old data ...)
*
* Merge back on mfr to look at coverage of kids with visits in given munic
* we only want to use data from munic*years with good NOVAX coverage - some munics eg only have registrations for 3pct of their kids in a given year 
* eg if they started using novax late in the year or impored old data. we omit those by imposing a coverage requirement 
*****************************************************************************************************************

use "$work\sample_novax", clear // all pnrs in NOVAX irrespective of mom dad kid
keep if kid==1 // only keep kids

* only focus on infants - not many omissions here - create a share of infants in the given munic that is covered

merge 1:1 pnr using "$work\unique_firstcontacts.dta", keep(3) nogen //first contact of the given kid

rename KommuneID novaxmuni

gen age_first=year_firstcontact-mfr_yob
drop if age_first<-1 
drop if age_first>1 

* merge to full mfr to calculate share of kids in given muni who has at least one novax record

*****************************************************************************************************************
* make a dataset (formaps) that identifies the relevant nvax munics and their entry into the data
****************************************************************************************************************

gen innovax=1
drop mom dad
merge 1:1 pnr using "$work\mfr_kids", nogen

* use novaxmuni to collapse here
replace innovax=0 if innovax==.
collapse (mean) innovax, by(novaxmuni  mfr_yob)
sort novaxmuni mfr_yob
gen treated=mfr_yob if innovax>=0.9 // assumption: A munic registers correctly when 90 % of a cohort is having a nurse contact. 

collapse (min) treated, by(novaxmuni )

sort treated

gen mfr_muni=novaxmuni
save "$work\formaps.dta",replace // List of munics and from which year they enter NOVAX 98 munics. 61 enter at some point

* merge all contacts in novax with "treated" info to only have years with good coverage - coverage determined by contacts present 

use "$work\formaps.dta",clear // only the ones with good coverage
rename mfr_muni KommuneID

merge 1:m KommuneID using "$work\contacts_types.dta" // all contacts and their type in the good coverage years

* only keep relevant years - treated with NOVAX. omit a few
gen help=year(date)

* drop spells from before NOVAX covers majority of munic
drop if help<treated
drop if treated==. // non novax munis
drop help _merge

rename KommuneID muni 
save "$work\relevantcontacts.dta",replace
* contacts only from muni*yob cells with good coverage: 2.9 mio in yrs 2012-2017 (obs only have contacts to 2017 so in the yr of birth of final cohort)
* N = 2,943,921


*****************************************************************************************************************
* now we have mom dad and kid contacts - but who are in the same family?
*
* for mom/dad we need to figure out which kid the contact is about - if mom pnr is registered this is not clear bc moms have multiple kids
* we want to end with a dataset that has the RELEVANT kid pnr as the idntifier
* for kids: easy - it is the kid
* for mom/dad contacts: need to figure out which kid the contact was about
*****************************************************************************************************************
use "$work\sample_novax", clear // all mom dad and kids in NOVAX data

merge 1:m pnr using "$work\relevantcontacts.dta" // primarily  pre2009 births not in good coverage years
keep if _merge==3
drop _merge

save "$work\contacts_allmerged.dta",replace // N=2,733,054

preserve
keep if kid==1
save "$work\contacts_kid.dta",replace
restore

preserve
keep if mom==1
save "$work\contacts_mom.dta",replace
restore

preserve
keep if dad==1
save "$work\contacts_dad.dta",replace
restore

* merge with allkids based on mfr to get parents id and family link so we can calculate number of visits and so on on family level
* important to think about focus on FOCAL child - e.g. mom's contact: for which child?

use "$work\contacts_kid.dta",clear
merge m:1 pnr using "$work\mfr_kids" 
keep if _merge==3 // merged kid contacts

drop _merge // nonmerges all from allkids

save "$work\contacts_kid_family.dta",replace //N=2,082,704


* mom and dad and kids pnrs - find relevant kids and make that kids pnr the contact spell's pnr
foreach num in mom dad{
use "$work\contacts_`num'.dta",clear

merge m:1 pnr using "$work\the`num's_wide"
keep if _merge==3
drop _merge

foreach nums of numlist 1(1)5{
gen help`nums'=date-mfr_dob`nums'
replace help`nums'=abs(help`nums')
}

egen help=rowmin(help1-help5)

* relevant kid: closest birth day to first mom contact
gen relevantkid=""
gen dob=.
gen mfr_muni=.

foreach nums of numlist 1(1)5{
replace relevantkid=pnr`nums' if help`nums'==help
replace dob=mfr_dob`nums' if help`nums'==help
replace mfr_muni=mfr_muni`nums' if help`nums'==help

}

drop mfr_yob // from the mfr kid merge
gen yob=year(dob)


drop pnr1-mfr_dob5 
rename pnr pnr`num'
rename relevantkid pnr

save "$work\contacts_`num'_family.dta",replace
}



* append all and make measures on relevantkid level (pnr is now always kid also for contacts that in novax were registered for the mom of that kid)
use "$work\contacts_kid_family.dta",clear
append using "$work\contacts_mom_family.dta"
append using "$work\contacts_dad_family.dta" // N=2,733,054

sort pnr date
order pnr id_*

drop if yob<treated-1
compress
*******************************************************************************
* all the contacts in a family - now all assigned to relevant kid id, needs to be cleaned for duplicates (if nurse mad multiple entrys for a contact type - eg visit for both mom and kid has two spells)
*
order pnr mom dad kid

drop id_father1 -  mfr_dob5
drop pnrmom - pnrdad
save "$work\familycontacts.dta",replace // N=2,733,035
*******************************************************************************

*******************************************************************************
* RECODING/OUTCOME VARS from here
* generate variables on family's contacts and other nurse treatments
* univisits - number and date of first visit
* behovsvist - number of targeted contacts (out of the regular visits)
* calls and other contacts, number
* nurses' report of concerns or comments
*******************************************************************************

use "$work\familycontacts.dta", clear 

* omit duplicates
foreach var of varlist visit consultation behov phone lettermail group henvist underret openhouse grav{
duplicates drop pnr date `var', force
}

* cluster families: use relevant kid pnr - pnr is here the kid pnr and is defined also for mother and dad visits/spells
gen univisit=(visit==1 & behov!=1)
gen phoneex = max(phone,lettermail)
duplicates drop pnr date phoneex, force
drop phone

* a spell is not a universal visit if more than one of the following are true:
gen tot = univisit+ consultation+ behov+ phone+ lettermail+group+ henvist+ underret+ openhouse+grav
replace univisit = 0 if tot > 1

gen age_at = floor((date - mfr_dob)/30.25) +1 
keep if age_at > 0 & age_at < 13 

rename univisit uni 
rename behov need
rename phoneex phone

* what we want: first visit date, number of visits universal/targeted, number of calls and letter, date of first report of concern, number of reports of concern
* no universal visits/targeted visits

bys pnr: egen y_univisits=sum(uni)
bys pnr: egen y_needvisits=sum(need)

label var y_univisits "Number of first year universal visits"
label var y_needvisits "Number of first year targeted visits"

gen first=date if uni==1
bys pnr: egen y_datefirst=min(first)
drop first 

label var y_datefirst "Date of first visit"

*calls/letters
bys pnr: egen y_phone=sum(phone)

label var y_phone "Number of phonee/postal contacts"

* report of concerns

gen first=date if underret==1
bys pnr: egen y_dateunderret=min(first)
drop first 

label var y_dateunderret "Date of first concern filed to authorities"

bys pnr: egen y_underret=sum(underret)
label var y_underret "Number of concerns filed to authorities"

bys pnr: egen y_henvist=sum(henvist)
label var y_henvist "Number of referrals"


********************************************************************************
* outcomes at a specific age for each kid
********************************************************************************
preserve 

collapse (sum) uni need phone, by(pnr age_at)
reshape wide uni need phone, i(pnr) j(age_at)

foreach var of varlist uni* need*  phone* {
replace `var' = 0 if `var' == .
}
gen unim1 = uni1
gen needm1 = need1
gen phonem1=phone1

foreach v in uni need phone  {
forvalues i = 2/12 {
local j = `i' - 1	
gen `v'm`i' = `v'm`j' + `v'`i'
}
}
keep pnr unim* needm*  phonem*
save "$work\novax_atspecificmonth", replace
restore


********************************************************************************
* down to one obs per kid after all the recoding of the spells
********************************************************************************

bys pnr: keep if _n==1

keep pnr id_* kid mfr_yob mfr_muni mfr_dob   y_*

gen y_anyreferral=(y_henvist>0)
label var y_anyreferral "Any referral from nurse"




********************************************************************************
save "$work\novax_contacts_final", replace //229,525

********************************************************************************




********************************************************************************
* First/primary nurse indicator
********************************************************************************
use "$work\familycontacts.dta", clear
duplicates drop pnr date visit, force

keep if visit == 1
keep if BesøgendeSHPL != ""
sort pnr date
by pnr: keep if _n == 1
rename BesøgendeSHPL shp_first
keep pnr shp_first
save "$work\shp_first", replace

